# coding:utf8
import jieba


def context_jieba(strdata) -> list :
    genarator = jieba.cut_for_search(strdata)
    list1=list()
    for x in genarator:
        list1.append(x)
    return list1

def filter_words(strdata) -> bool:
    return strdata not in ['谷','帮','客']

def append_words(strdata) -> tuple:
    if strdata =='传智播':
        strdata='传智播客'
    elif strdata =='院校':
        strdata ='院校帮'
    elif strdata =='博学':
        strdata ='博学谷'
    return (strdata,1)

def extract_user_and_word(data):
    user_id =data[0]
    content =data[1]
    list1=context_jieba(content)
    list2=list()
    for x in list1:
        if filter_words(x):
            list2.append((user_id+'_'+append_words(x)[0],1))
    return list2















